We begin by loading the packages and sourcing the function that we will need.

# Data visualisation
library(ggplot2)
library(ggpubr)
library(scales)
library(ggnewscale)
theme_set(theme_minimal(base_size = 13))

# Modelling packages
library(unmarked)

# Data management
library(tidyverse)
library(lubridate)

# Source all custom functions
source("./utils/comparison_of_occupancy_models.R")

# Locale
Sys.setenv(LANG = "en_US.UTF-8")
Sys.setenv("LANGUAGE" = "EN")

Data curating

Download and prepare data

The data was presented in Gimenez et al. (2022) and is available in the git repository associated to this publication. In this comparison, we only use data from the Ain county.

We first download and load the data set.

if (!dir.exists("./data/")){
  dir.create("./data/")
}

# Download the data from the Ain county
if (!file.exists("./data/metadata_Ain.RData")) {
  download.file(
    "https://github.com/oliviergimenez/computo-deeplearning-occupany-lynx/raw/master/dat/metadata_Ain.RData",
    "./data/metadata_Ain.RData"
  )
}

# Load data from Ain
load('./data/metadata_Ain.RData')
allpic <- allfiles %>%
  mutate(Keywords = as.character(observed)) %>% # pick manual tags
  mutate(DateTime = ymd_hms(str_replace(DateTimeOriginal, "2019:02:29", "2019:03:01"))) %>%
  mutate(FileName = pix) %>% 
  select(FileName, Directory, Keywords, DateTime)
rm(allfiles)

# Note: 4 rows failed to parse when we used "ymd_hms(DateTimeOriginal)"
#       because date in DateTimeOriginal are 2019:02:29 (format %y:%m:%d)
#       and there February only had 28 days in 2019...
# Since we will not use them, because they are not lynx detections,
# we don't have to investigate this further. 
# We just replaced 2019:02:29 by 2019:03:01 because it's the most likely error.

# Add SiteID
allpic$SiteID = gsub("_.*$", "", allpic$FileName)

# Remove columns that we will not use
allpic = allpic %>% 
  select(SiteID, FileName, Keywords, DateTime)

Visualise various species detections

We will only focus on the lynx here. But we’ll still look a bit more into our data set.

## We list all the labels that are used
(all_labels <- sort(unique(c(
 allpic$Keywords[!grepl('"', allpic$Keywords)], unname(unlist(sapply(unique(allpic$Keywords), function(x) {
   gsub('"', '', regmatches(x, gregexpr('"([^"]*)"', x))[[1]])
 })))
))))
 [1] "blaireaux" "cavalier"  "cerf"      "chamois"   "chasseur"  "chat"     
 [7] "chevreuil" "chien"     "ecureuil"  "fouine"    "humain"    "lievre"   
[13] "lynx"      "martre"    "oiseaux"   "renard"    "sangliers" "vaches"   
[19] "vehicule" 
# Grouping some labels
human_labels = c("cavalier", "chasseur", "chien", "Fréquentation humaine", "humain", "vehicule", "véhicule")
squirrel_labels = c("ecureuil", "écureuil", "Sciuridae")
small_mustelids_labels = c("fouine", "martre")
lagomorph_labels = c("laporidés", "lievre", "lièvre")

## All the sites and deployment times
# + I added the number of detection events per species
allsites = allpic %>%
  mutate(
    NbLynx = grepl(pattern = "lynx", x = Keywords),
    NbHuman = sapply(Keywords, function(lab) {any(grepl(paste(human_labels, collapse = "|"), lab))}),
    NbBadger = grepl(pattern = "blaireaux", x = Keywords),
    NbRedDeer = grepl(pattern = "cerf", x = Keywords),
    NbChamois = grepl(pattern = "chamois", x = Keywords),
    NbRoeDeer = grepl(pattern = "chevreuil", x = Keywords), 
    NbSquirrel = sapply(Keywords, function(lab) {any(grepl(paste(squirrel_labels, collapse = "|"), lab))}),
    NbSmallMustelid = sapply(Keywords, function(lab) {any(grepl(paste(small_mustelids_labels, collapse = "|"), lab))}),
    NbLagomorph = sapply(Keywords, function(lab) {any(grepl(paste(lagomorph_labels, collapse = "|"), lab))}),
    NbFox = grepl(pattern = "renard", x = Keywords), 
    NbWildBoar = grepl(pattern = "sangliers", x = Keywords),
    NbWildcat = grepl(pattern = "chat forestier", x = Keywords)
  ) %>%
  group_by(SiteID) %>%
  dplyr::summarise(
    FirstPic = min(DateTime),
    LastPic = max(DateTime),
    across(starts_with("Nb"), ~ sum(.x)),
    .groups = "keep"
  ) %>% 
  mutate(DeploymentDuration = LastPic - FirstPic) %>% 
  relocate(DeploymentDuration, .before = FirstPic)

allsites %>% 
  mutate(DeploymentDuration = round(DeploymentDuration, 2)) %>%
  mutate(across(where(is.character), \(x) as.factor(x))) %>% 
  mutate(across(where(is.numeric), \(x) round(x, 2))) %>%
  myDT(caption = "Monitoring periods and detections per site", pageLength=11)
  • The total number of camera-trap days is 5396.35221064815.
  • The lynx was detected 203 times in total.

Let’s see how many detections there are per site and per species.

allsites %>% 
  tidyr::pivot_longer(cols = starts_with("Nb"),
               names_to = c("Species"),
               names_prefix = "Nb",
               values_to = "NbDetections") %>% 
  dplyr::group_by(Species) %>% 
  dplyr::mutate(SpeciesTxt = paste0(
    str_replace(gsub("(?<!^)(?=[A-Z])", " ", Species, perl=TRUE), "\\s(\\w+)", function(x) {tolower(x)}),
    "\n",
    sum(NbDetections == 0), "/", length(unique(allsites$SiteID)),
    " site", ifelse(sum(NbDetections == 0) > 1, "s", ""),
    " with no detection"
  )) %>% 
  ggplot() +
  geom_histogram(aes(x = NbDetections, y = after_stat(count), fill = NbDetections > 0), binwidth = 1) +
  geom_density(aes(x = NbDetections, y = after_stat(count))) +
  facet_wrap(SpeciesTxt ~ ., scales = "free") +
  labs(x = "Number of images with the species",
       y = "Number of sites")+
  theme(legend.position = "none")

Format the lynx data

For the rest of the analysis, we will only keep the data for the lynx.

LynxPic = allpic %>% 
  filter(grepl(pattern = "lynx", x = Keywords))

Data discretisation

For the BP and COP models, we have to discretise data into a matrix per session of detection/non-detection (BP) or counts (COP). We’ll use the same discretisation intervals as in the simulation study: by month, week and day.

# Beginning of the sessions
BeginDateTime = min(allsites$FirstPic)
EndDateTime = max(allsites$LastPic)

# Month sessions
MonthSessions = seq(
  floor_date(BeginDateTime, unit = "month"),
  ceiling_date(EndDateTime, unit = "month"),
  by = "months"
)
MonthSessionsLabels = format(MonthSessions, "%b %y", locale = "en_GB")
LynxPic$MonthSession = cut(
  LynxPic$DateTime,
  breaks = MonthSessions,
  labels = MonthSessionsLabels[-length(MonthSessionsLabels)],
  include.lowest = FALSE
)

# Week sessions
WeekSessions = seq(
  floor_date(BeginDateTime, unit = "month"),
  ceiling_date(EndDateTime, unit = "month"),
  by = "week"
)
WeekSessionsLabels = format(WeekSessions, "%Y week %U", locale = "en_GB")
LynxPic$WeekSession = cut(
  LynxPic$DateTime,
  breaks = WeekSessions,
  labels = WeekSessionsLabels[-length(WeekSessionsLabels)],
  include.lowest = FALSE
)

# Day sessions
DaySessions = seq(
  floor_date(BeginDateTime, unit = "month"),
  ceiling_date(EndDateTime, unit = "month"),
  by = "day"
)
DaySessionsLabels = format(DaySessions, "%d %B %Y", locale = "en_GB")
LynxPic$DaySession = cut(
  LynxPic$DateTime,
  breaks = DaySessions,
  labels = DaySessionsLabels[-length(DaySessionsLabels)],
  include.lowest = FALSE
)

# We now have the session for each photo of a lynx
LynxPic %>% 
  select(-FileName, -Keywords) %>% 
  print()
# A tibble: 203 x 5
   SiteID DateTime            MonthSession WeekSession  DaySession     
   <chr>  <dttm>              <fct>        <fct>        <fct>          
 1 15.1   2017-10-01 07:17:59 Oct 17       2017 week 40 01 October 2017
 2 15.2   2018-07-23 08:00:27 Jul 18       2018 week 29 23 July 2018   
 3 15.2   2018-07-23 08:00:32 Jul 18       2018 week 29 23 July 2018   
 4 15.2   2018-10-18 11:20:11 Oct 18       2018 week 41 18 October 2018
 5 15.2   2018-07-23 08:00:29 Jul 18       2018 week 29 23 July 2018   
 6 15.2   2018-07-24 11:26:00 Jul 18       2018 week 29 24 July 2018   
 7 15.2   2018-07-25 16:03:00 Jul 18       2018 week 29 25 July 2018   
 8 15.2   2018-07-23 08:00:36 Jul 18       2018 week 29 23 July 2018   
 9 15.2   2018-07-25 19:25:16 Jul 18       2018 week 29 25 July 2018   
10 15.2   2018-08-19 08:11:40 Aug 18       2018 week 33 19 August 2018 
# i 193 more rows

Data formatting for continuous-time models

We will reformat the data into lists, in the same format that we used for the simulation study, in order to reuse the same functions.

# Number of sites
NbSites = length(allsites$SiteID)

# List of the detection times
# format: detection_times[[site i]][[deployment j]] -> vector of detection times
# Because we only have one deployment per site, the format is:
#  detection_times[[site i]][[1]] -> vector of detection times
#  
#  Detection times are numerics, between 0 and the latest detection, 
#  in the chosen time-unit

# list_T_ij
# Duration of deployment j at site i
# list_T_ij[[site i]] -> vector of the R_i duration of deployment(s) at site i

# Initialisation detection_times
detection_times <- vector(mode = "list", length = NbSites)
names(detection_times) <- allsites$SiteID

# Initialisation list_T_ij
list_T_ij <- vector(mode = "list", length = NbSites)
names(list_T_ij) <- allsites$SiteID

for (i in 1:NbSites) {
  i_siteID = names(detection_times)[i]
  
  # Date and time of the beginning and end of the deployment
  i_BeginTime = allsites %>% filter(SiteID == i_siteID) %>% pull(FirstPic)
  i_EndTime = allsites %>% filter(SiteID == i_siteID) %>% pull(LastPic)
  
  # Date and time of detections
  i_DetecDateTime = sort(LynxPic %>%
         filter(SiteID == i_siteID) %>%
         pull(DateTime))
  
  # Days between the beginning of the deployment and each detection event
  i_DetecTimeDays = as.numeric(
    difftime(i_DetecDateTime, i_BeginTime, units = "days")
  )
  
  # Adding the time of detection events
  detection_times[[i]] <- list("1" = i_DetecTimeDays)
  
  # Duration of the deployment (in days too)
  list_T_ij[[i]]<-as.numeric(
    difftime(i_EndTime, i_BeginTime, units = "days")
  )
}

# list_R_i is the number of deployments at sites
# (here 1 deployment only in all sites)
list_R_i = rep(1, NbSites)

Our continuous data now is formatted in lists. detection_times is the time of detection in days since the deployment began, which is, for the first three sites:

print(detection_times[1:3]) # Time of detection in days since the deployment began
$`15.1`
$`15.1`$`1`
[1] 61.79045


$`15.2`
$`15.2`$`1`
 [1] 101.6343 109.1934 228.1474 285.2514 292.5099 305.3569 330.4648 396.7233
 [9] 396.7234 396.7234 396.7234 397.0485 397.2232 397.5470 397.8661 398.3658
[17] 398.9069 399.0584 399.1989 399.5581 399.6976 423.7311 428.4760 464.9331
[25] 483.8620 539.2772 549.5200 569.6709 572.1523 575.1036 595.1144 596.7564
[33] 620.2774 625.4375


$`22.1`
$`22.1`$`1`
[1]   2.347917   9.562002 126.644248 160.734745 165.742106 165.742106 165.779595

And list_T_ij stores the deployment duration in days, which is, for the first three sites:

print(list_T_ij[1:3]) # Deployment duration in days
$`15.1`
[1] 183.5267

$`15.2`
[1] 656.9314

$`22.1`
[1] 184.0091

Visualise the detection history

We can now visualise the detection history of the lynx as well as the monitoring period per site.

# For the paper, we rename sites.
SiteID_2_SiteIDNew = left_join(
  (
    allsites %>%
      tidyr::separate(
        SiteID,
        into = c("SiteIDleft", "SiteIDright"),
        sep = "\\.",
        remove = FALSE
      )
  ),
  (
    allsites %>%
      tidyr::separate(
        SiteID,
        into = c("SiteIDleft", "SiteIDright"),
        sep = "\\.",
        remove = FALSE
      ) %>%
      dplyr::group_by(SiteIDleft) %>%
      dplyr::summarise(NbLynxTot = sum(NbLynx)) %>%
      dplyr::arrange(NbLynxTot) %>%
      dplyr::mutate(SiteIDleftNew = LETTERS[1:nrow(.)])
  ),
  by = "SiteIDleft"
) %>%
  mutate(SiteIDNew = paste0(SiteIDleftNew, SiteIDright)) %>%
  select(SiteID, SiteIDNew)

Continuous

LynxDetecHistory_continuous = ggplot(data = (
  allsites %>%
    left_join(LynxPic, by = "SiteID") %>%
    left_join(SiteID_2_SiteIDNew, by = "SiteID")
),
aes(x = DateTime, y = SiteIDNew)) +
  geom_segment(aes(
    x = FirstPic,
    xend = LastPic,
    yend = SiteIDNew,
    color = "Monitoring period"
  ), linewidth = 2.5) +
  geom_point(aes(fill = "Detection event"), shape = 4) +
  scale_x_datetime(date_breaks = "3 month" , date_labels = "%b %y") +
  scale_colour_manual(values = c("Monitoring period" = "grey"), name = "") +
  scale_fill_manual(values = c("Detection event" = "black"), name = "") +
  labs(title = "Lynx detection history") +
  theme(
    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 0.3
    ),
    axis.title = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom"
  )
print(LynxDetecHistory_continuous)

# Export
if (!dir.exists('./output/')) {
  dir.create('./output/')
}
ggsave(
  "./output/lynx_detection_history_continuous.pdf",
  plot = (
    LynxDetecHistory_continuous +
      theme(plot.title = element_blank())
  ),
  width = 23,
  height = 10,
  unit = "cm"
)
jpeg(
  filename = "./output/lynx_detection_history_continuous.jpeg",
  width = 23,
  height = 10,
  unit = "cm",
  res = 100
)
print(
  LynxDetecHistory_continuous +
    theme(plot.title = element_blank())
)
dev.off()
png 
  2 

Discretised per day

DayTable = as_tibble(data.frame("DaySession" = cut(
    DaySessions,
    breaks = DaySessions,
    labels = DaySessionsLabels[-length(DaySessionsLabels)],
    include.lowest = FALSE
  )[-length(DaySessions)],
  "DaySessionBegin" = DaySessions[-length(DaySessions)] + seconds(0),
  "DaySessionEnd" = DaySessions[-1] - seconds(1e-16)
))

DayLongDF <- expand.grid(
  "SiteID" = unique(allsites$SiteID),
  "DaySession" = DayTable$DaySession) %>%
  as_tibble() %>% 
  left_join(allsites,
            by = "SiteID") %>% 
  left_join(DayTable, by = "DaySession") %>% 
  mutate(
    MonitoringTime = replace_na(as.duration(
      lubridate::intersect(
        lubridate::interval(DaySessionBegin, DaySessionEnd),
        lubridate::interval(FirstPic, LastPic)
      )
    ), as.duration(seconds(0))),
    FullyMonitored = (FirstPic <= DaySessionBegin &
                        LastPic >= DaySessionEnd),
    PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
      (MonitoringTime < max(MonitoringTime, na.rm = T))
  ) %>% 
  left_join((
    LynxPic %>%
      group_by(SiteID, DaySession) %>%
      dplyr::summarise(n = n(), .groups = "keep")
  ),
  by = c("SiteID", "DaySession")) %>% 
  mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))


DayCountMatrix <- DayLongDF %>% 
  select(SiteID, DaySession, NbDetec) %>% 
  pivot_wider(names_from = "DaySession", 
              values_from = "NbDetec") %>% 
  column_to_rownames("SiteID")


LynxDetecHistory_day = ggplot() +
  geom_tile(
    data = DayLongDF,
    aes(
      x = as.Date(DaySessionBegin),
      y = SiteID,
      fill = ifelse(NbDetec == 0, NA, NbDetec)
    )
  ) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detections",
    breaks = round(seq(
      from = 1,
      to = max(DayLongDF$NbDetec, na.rm = T),
      length.out = 4
    )),
    na.value = "transparent"
  ) +
  new_scale_fill() +
  geom_tile(
    data = (
      DayLongDF %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(
          is.na(NbDetec), "Not monitored", "No detection"
        ))
    ),
    aes(
      x = as.Date(DaySessionBegin),
      y = SiteID,
      fill = NbDetec0NA
    )
  ) +
  scale_fill_manual(
    values = c(
      "No detection" = "#ba3c3c",
      "Not monitored" = "white"
    ),
    name = ""
  ) +
  scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
  labs(title = "Lynx daily detection history") +
  theme(
    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    legend.position = "bottom"
  )
print(LynxDetecHistory_day)

  • 22 sessions were dropped because they were incomplete.
  • This corresponds to a duration of 10d 8H 27M 11S where sites were monitored that were discarded.
DayLongDF %>% 
  dplyr::group_by(SiteID) %>% 
  dplyr::filter(PartiallyMonitored) %>% 
  dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
    sum(MonitoringTime)
  )))) %>%
  myDT(caption = "Monitored but discarded time per site with daily sessions", pageLength=11)

Discretised per week

# For each monthly session, we check if each site was monitored during the
# entire session ("Monitored = TRUE") or not ("Monitored = FALSE").
# If the site was not monitored during the entire session, observation
# will be NAs.

WeekTable = data.frame("WeekSession" = cut(
    WeekSessions,
    breaks = WeekSessions,
    labels = WeekSessionsLabels[-length(WeekSessionsLabels)],
    include.lowest = FALSE
  )[-length(WeekSessions)],
  "WeekSessionBegin" = WeekSessions[-length(WeekSessions)],
  "WeekSessionEnd" = WeekSessions[-1] - days(1)
)

WeekLongDF <- expand.grid(
  "SiteID" = unique(allsites$SiteID),
  "WeekSession" = WeekTable$WeekSession) %>%
  as_tibble() %>% 
  left_join(allsites,
            by = "SiteID") %>% 
  left_join(WeekTable, by = "WeekSession") %>% 
  mutate(
    MonitoringTime = replace_na(as.duration(
      lubridate::intersect(
        lubridate::interval(WeekSessionBegin, WeekSessionEnd),
        lubridate::interval(FirstPic, LastPic)
      )
    ), as.duration(seconds(0))),
    FullyMonitored = (FirstPic <= WeekSessionBegin &
                        LastPic >= WeekSessionEnd),
    PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
      (MonitoringTime < max(MonitoringTime, na.rm = T))
  ) %>% 
  left_join((
    LynxPic %>%
      group_by(SiteID, WeekSession) %>%
      dplyr::summarise(n = n(), .groups = "keep")
  ),
  by = c("SiteID", "WeekSession")) %>% 
  mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))

WeekCountMatrix <- WeekLongDF %>% 
  select(SiteID, WeekSession, NbDetec) %>% 
  pivot_wider(names_from = "WeekSession", 
              values_from = "NbDetec") %>% 
  column_to_rownames("SiteID")


LynxDetecHistory_week = ggplot() +
  geom_tile(
    data = WeekLongDF,
    aes(
      x = as.Date(WeekSessionBegin),
      y = SiteID,
      fill = ifelse(NbDetec == 0, NA, NbDetec)
    ), colour = "grey"
  ) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detections",
    breaks = round(seq(
      from = 1,
      to = max(WeekLongDF$NbDetec, na.rm = T),
      length.out = 4
    )),
    na.value = "transparent"
  ) +
  new_scale_fill() +
  geom_tile(
    data = (
      WeekLongDF %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(
          is.na(NbDetec), "Not monitored", "No detection"
        ))
    ),
    aes(
      x = as.Date(WeekSessionBegin),
      y = SiteID,
      fill = NbDetec0NA
    ), colour = "grey"
  ) +
  scale_fill_manual(
    values = c(
      "No detection" = "#ba3c3c",
      "Not monitored" = "white"
    ),
    name = ""
  ) +
  scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
  labs(title="Lynx weekly detection history")+
  theme(
    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    legend.position = "bottom"
  )
print(LynxDetecHistory_week)

  • 22 sessions were dropped because they were incomplete.
  • This corresponds to a duration of 65d 8H 27M 11S where sites were monitored that were discarded.
WeekLongDF %>% 
  dplyr::group_by(SiteID) %>% 
  dplyr::filter(PartiallyMonitored) %>% 
  dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
    sum(MonitoringTime)
  )))) %>%
  myDT(caption = "Monitored but discarded time per site with weekly sessions", pageLength=11)

Discretised per month

# For each monthly session, we check if each site was monitored during the
# entire session ("Monitored = TRUE") or not ("Monitored = FALSE").
# If the site was not monitored during the entire session, observation
# will be NAs.

MonthTable = data.frame("MonthSession" = cut(
    MonthSessions,
    breaks = MonthSessions,
    labels = MonthSessionsLabels[-length(MonthSessionsLabels)],
    include.lowest = FALSE
  )[-length(MonthSessions)],
  "MonthSessionBegin" = MonthSessions[-length(MonthSessions)],
  "MonthSessionEnd" = MonthSessions[-1] - days(1)
)

MonthLongDF <- expand.grid(
  "SiteID" = unique(allsites$SiteID),
  "MonthSession" = MonthTable$MonthSession) %>%
  as_tibble() %>% 
  left_join(allsites,
            by = "SiteID") %>% 
  left_join(MonthTable, by = "MonthSession") %>% 
  mutate(
    MonitoringTime = replace_na(as.duration(
      lubridate::intersect(
        lubridate::interval(MonthSessionBegin, MonthSessionEnd),
        lubridate::interval(FirstPic, LastPic)
      )
    ), as.duration(seconds(0))),
    FullyMonitored = (FirstPic <= MonthSessionBegin &
                        LastPic >= MonthSessionEnd),
    PartiallyMonitored = !FullyMonitored & (MonitoringTime > 0) &
      (MonitoringTime < max(MonitoringTime, na.rm = T))
  ) %>% 
  left_join((
    LynxPic %>%
      group_by(SiteID, MonthSession) %>%
      dplyr::summarise(n = n(), .groups = "keep")
  ),
  by = c("SiteID", "MonthSession")) %>% 
  mutate(NbDetec = ifelse(FullyMonitored, ifelse(is.na(n), 0, n), NA))

MonthCountMatrix <- MonthLongDF %>% 
  select(SiteID, MonthSession, NbDetec) %>% 
  pivot_wider(names_from = "MonthSession", 
              values_from = "NbDetec") %>% 
  column_to_rownames("SiteID")


LynxDetecHistory_month = ggplot() +
  geom_tile(
    data = MonthLongDF,
    aes(
      x = as.Date(MonthSessionBegin),
      y = SiteID,
      fill = ifelse(NbDetec == 0, NA, NbDetec)
    ), colour = "grey"
  ) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detections",
    breaks = round(seq(
      from = 1,
      to = max(MonthLongDF$NbDetec, na.rm = T),
      length.out = 4
    )),
    na.value = "transparent"
  ) +
  new_scale_fill() +
  geom_tile(
    data = (
      MonthLongDF %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(
          is.na(NbDetec), "Not monitored", "No detection"
        ))
    ),
    aes(
      x = as.Date(MonthSessionBegin),
      y = SiteID,
      fill = NbDetec0NA
    ), colour = "grey"
  ) +
  scale_fill_manual(
    values = c(
      "No detection" = "#ba3c3c",
      "Not monitored" = "white"
    ),
    name = ""
  ) +
  scale_x_date(date_breaks = "3 month" , date_labels = "%B %Y") +
  labs(title = "Lynx monthly detection history") +
  theme(
    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    legend.position = "bottom"
  )
print(LynxDetecHistory_month)

  • 15 sessions were dropped because they were incomplete.
  • This corresponds to a duration of 243d 5H 54M 26S where sites were monitored that were discarded.
MonthLongDF %>% 
  dplyr::group_by(SiteID) %>% 
  dplyr::filter(PartiallyMonitored) %>% 
  dplyr::summarise(`Discarded (in days)` = as.character(as.period(as.duration(
    sum(MonitoringTime)
  )))) %>%
  myDT(caption = "Monitored but discarded time per site with monthly sessions", pageLength=11)

Plot with the different discretisations

MaxNbDetec_AllDiscretisations = max(DayLongDF$NbDetec,
                                    WeekLongDF$NbDetec,
                                    MonthLongDF$NbDetec,
                                    na.rm = T)


# Month ----

MonthSessionsLabelsTrimester = MonthSessionsLabels
MonthSessionsLabelsTrimester[
  sort(c(seq(from = 2, to = length(MonthSessionsLabels), by = 3),
         seq(from = 3, to = length(MonthSessionsLabels), by = 3)))] <- ""

gg_Month = ggplot() +
  geom_tile(
    data = (
      MonthLongDF %>%
        left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, " "))
    ),
    aes(
      x = reorder(as.factor(format(
        as_date(MonthSessionBegin), "%b %y"
      )),
      as_date(MonthSessionBegin)),
      y = SiteIDNew,
      fill = NbDetec0NA
    )
  ) +
  scale_fill_manual(values = c(" " = "#ba3c3c"),
                    na.value = "transparent",
                    name = "No detection") +
  guides(fill = guide_legend(
    title.position = 'top',
    title.hjust = 0,
    title.vjust = -1
  )) +
  new_scale_fill() +
  geom_tile(data = (MonthLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
            aes(
              x = reorder(as.factor(format(
                as_date(MonthSessionBegin), "%b %y"
              )),
              as_date(MonthSessionBegin)),
              y = SiteIDNew,
              fill = ifelse(NbDetec == 0, NA, NbDetec)
            )) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detection(s)",
    breaks = round(seq(
      from = 1,
      to = MaxNbDetec_AllDiscretisations,
      length.out = 4
    )),
    limits = c(1, MaxNbDetec_AllDiscretisations),
    na.value = "transparent"
  ) +
  guides(fill = guide_colourbar(
    title.position = 'top',
    title.hjust = 0,
    title.vjust = -1,
    barheight = .5
  )) +
  scale_x_discrete(breaks = MonthSessionsLabels, labels = MonthSessionsLabelsTrimester) +
  labs(title = "(a) Monthly discretisation") +
  theme(    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    plot.title = element_text(size = 12, hjust = .5),
    panel.grid.major.x = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(size = 12),
    legend.title.align = 0.5
  )

# Week ----
WeekSessionsLabelsTrimester = format(WeekSessions, "%b %y")
WeekSessionsLabelsTrimester[duplicated(WeekSessionsLabelsTrimester)] = ""
WeekSessionsLabelsTrimester[
  !WeekSessionsLabelsTrimester %in% MonthSessionsLabelsTrimester
  ] = ""

gg_Week = ggplot() +
    geom_tile(
    data = (
      WeekLongDF %>%
        left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, "No detection"))
    ),
    aes(
      x =reorder(as.factor(format(as_date(WeekSessionBegin), "%Y week %U")),
                 as_date(WeekSessionBegin)),
      y = SiteIDNew,
      fill = NbDetec0NA
    )
  ) +
  scale_fill_manual(
    values = c("No detection" = "#ba3c3c"),
    na.value = "transparent", name = ""
  ) +
  new_scale_fill() +
  geom_tile(data = (WeekLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
            aes(
              x = reorder(as.factor(format(as_date(WeekSessionBegin), "%Y week %U")),
                          as_date(WeekSessionBegin)),
              y = SiteIDNew,
              fill = ifelse(NbDetec == 0, NA, NbDetec)
            )) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detections",
    breaks = round(seq(
      from = 1,
      to = MaxNbDetec_AllDiscretisations,
      length.out = 4
    )),
    limits = c(1, MaxNbDetec_AllDiscretisations),
    na.value = "transparent"
  ) +
  scale_x_discrete(breaks = WeekSessionsLabels, labels = WeekSessionsLabelsTrimester) +
  labs(title = "(b) Weekly discretisation") +
  theme(    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    plot.title = element_text(size = 12, hjust = .5),
    panel.grid.major.x = element_blank(),
    legend.position = "bottom"
  )

# Day ----
DaySessionsLabelsTrimester = format(DaySessions, "%b %y")
DaySessionsLabelsTrimester[duplicated(DaySessionsLabelsTrimester)] = ""
DaySessionsLabelsTrimester[
  !DaySessionsLabelsTrimester %in% MonthSessionsLabelsTrimester
  ] = ""

gg_Day = ggplot() +
    geom_tile(
    data = (
      DayLongDF %>%
        left_join(SiteID_2_SiteIDNew, by = "SiteID") %>%
        filter(is.na(NbDetec) | NbDetec == 0) %>%
        mutate(NbDetec0NA = ifelse(is.na(NbDetec), NA, "No detection"))
    ),
    aes(
      x =reorder(as.factor(format(as_date(DaySessionBegin), "%d %B %Y")),
                 as_date(DaySessionBegin)),
      y = SiteIDNew,
      fill = NbDetec0NA
    )
  ) +
  scale_fill_manual(
    values = c("No detection" = "#ba3c3c"),
    na.value = "transparent", name = ""
  ) +
  new_scale_fill() +
  geom_tile(data = (DayLongDF %>% left_join(SiteID_2_SiteIDNew, by = "SiteID")),
            aes(
              x = reorder(as.factor(format(as_date(DaySessionBegin), "%d %B %Y")),
                          as_date(DaySessionBegin)),
              y = SiteIDNew,
              fill = ifelse(NbDetec == 0, NA, NbDetec)
            )) +
  scale_fill_gradient(
    low = "#93c47d",
    high = "#274e13",
    name = "Detections",
    breaks = round(seq(
      from = 1,
      to = MaxNbDetec_AllDiscretisations,
      length.out = 4
    )),
    limits = c(1, MaxNbDetec_AllDiscretisations),
    na.value = "transparent"
  ) +
  scale_x_discrete(breaks = DaySessionsLabels, labels = DaySessionsLabelsTrimester) +
  labs(title = "(c) Daily discretisation")+
  theme(
    axis.text.x = element_text(
      angle = 90,
      vjust = 0.5,
      hjust = 1
    ),
    axis.title = element_blank(),
    plot.title = element_text(size = 12, hjust = .5),
    panel.grid.major.x = element_blank(),
    legend.position = "bottom"
  )


# Discrete ----
gg_Discrete = ggarrange(
  gg_Month + theme(axis.text.x = element_blank()),
  gg_Week + theme(axis.text.x = element_blank()),
  gg_Day + theme(axis.text.x = element_text(angle = 0, hjust = .5)),
  nrow = 3,
  heights = c(.8, .8, 1),
  common.legend = T,
  legend = "bottom"
)
print(gg_Discrete)

# Export
ggsave(
  "./output/lynx_detection_history_discrete.pdf",
  plot = gg_Discrete,
  width = 25,
  height = 18,
  unit = "cm"
)
jpeg(
  filename = "./output/lynx_detection_history_discrete.jpeg",
  width = 25,
  height = 18,
  unit = "cm",
  res = 100
)
print(gg_Discrete)
dev.off()
png 
  2 

Occupancy modelling for the lynx

We initialise the dataframe to store the result for all models.

ModelComparisonDF <- data.frame()

BP with unmarked::occu

Discretised per month

We create an unmarkedFrame object.

umf <- unmarkedFrameOccu(y = (as.matrix(MonthCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object

11 sites
Maximum number of observations per site: 29 
Mean number of observations per site: 15.36 
Sites with at least one detection: 8 

Tabulation of y observations:
   0    1 <NA> 
 118   51  150 

We calculate intuitive starting points for likelihood maximisation.

For the occupancy probability, we use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727

For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.

(p_init <- mean(
  getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0, 
  na.rm = T
))
[1] 0.375

We run the model.

beforetime = Sys.time()
MonthOccuMod <- occu(formula =  ~ 1 ~ 1,
     data = umf,
     method = "Nelder-Mead",
     starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(MonthOccuMod)

Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init), 
    qlogis(p_init)), method = "Nelder-Mead")

Occupancy:
 Estimate    SE    z P(>|z|)
     1.11 0.742 1.49   0.136

Detection:
 Estimate    SE     z P(>|z|)
   -0.531 0.181 -2.94 0.00332

AIC: 196.3125 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(MonthOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)

 Estimate    SE LinComb (Intercept)
    0.751 0.139    1.11           1

Transformation: logistic 
backTransform(MonthOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)

 Estimate     SE LinComb (Intercept)
     0.37 0.0421  -0.531           1

Transformation: logistic 
# 95% Confidence intervals
plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))
             0.025     0.975
psi(Int) 0.4138047 0.9282099
plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))
           0.025     0.975
p(Int) 0.2922106 0.4559926
# 50% Confidence intervals
plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.6468668 0.8328493
plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))
            0.25      0.75
p(Int) 0.3424411 0.3992181

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "BP",
  "Discretisation" = "Month",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(MonthOccuMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(MonthOccuMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(MonthOccuMod, type = "state")@estimate,
  "psi_CI95lower" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(MonthOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
  # p
  "p_TransformedPointEstimate" = unname(coef(MonthOccuMod)["p(Int)"]),
  "p_TransformedSE" = unname(SE(MonthOccuMod)["p(Int)"]),
  "p_PointEstimate" = backTransform(MonthOccuMod, type = "det")@estimate,
  "p_CI95lower" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))[1],
  "p_CI95upper" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal'))[2],
  "p_CI50lower" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
  "p_CI50upper" = plogis(confint(MonthOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))

Discretised per week

We run the exact same process. We again start by creating an unmarkedFrame object.

umf <- unmarkedFrameOccu(y = (as.matrix(WeekCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object

11 sites
Maximum number of observations per site: 125 
Mean number of observations per site: 69.09 
Sites with at least one detection: 8 

Tabulation of y observations:
   0    1 <NA> 
 711   49  615 

We calculate intuitive starting points for likelihood maximisation, using the same calculations as before.

For the occupancy probability, we use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727

For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.

(p_init <- mean(
  getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0, 
  na.rm = T
))
[1] 0.08085809

We run the model.

beforetime = Sys.time()
WeekOccuMod <- occu(formula =  ~ 1 ~ 1,
     data = umf,
     method = "Nelder-Mead",
     starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(WeekOccuMod)

Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init), 
    qlogis(p_init)), method = "Nelder-Mead")

Occupancy:
 Estimate    SE    z P(>|z|)
     1.19 0.786 1.52   0.129

Detection:
 Estimate    SE     z  P(>|z|)
    -2.45 0.153 -16.1 4.95e-58

AIC: 356.3957 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(WeekOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)

 Estimate   SE LinComb (Intercept)
    0.767 0.14    1.19           1

Transformation: logistic 
backTransform(WeekOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)

 Estimate     SE LinComb (Intercept)
   0.0792 0.0111   -2.45           1

Transformation: logistic 
# 95% Confidence intervals
plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))
             0.025     0.975
psi(Int) 0.4137922 0.9390257
plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))
            0.025     0.975
p(Int) 0.05994302 0.1039822
# 50% Confidence intervals
plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.6598463 0.8485753
plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))
             0.25       0.75
p(Int) 0.07201254 0.08705714

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "BP",
  "Discretisation" = "Week",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(WeekOccuMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(WeekOccuMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(WeekOccuMod, type = "state")@estimate,
  "psi_CI95lower" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(WeekOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
  # p
  "p_TransformedPointEstimate" = unname(coef(WeekOccuMod)["p(Int)"]),
  "p_TransformedSE" = unname(SE(WeekOccuMod)["p(Int)"]),
  "p_PointEstimate" = backTransform(WeekOccuMod, type = "det")@estimate,
  "p_CI95lower" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))[1],
  "p_CI95upper" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal'))[2],
  "p_CI50lower" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
  "p_CI50upper" = plogis(confint(WeekOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))

Discretised per day

We create an unmarkedFrame object.

umf <- unmarkedFrameOccu(y = (as.matrix(DayCountMatrix) > 1) * 1)
summary(umf)
unmarkedFrame Object

11 sites
Maximum number of observations per site: 881 
Mean number of observations per site: 489.64 
Sites with at least one detection: 8 

Tabulation of y observations:
   0    1 <NA> 
5349   37 4305 

We calculate intuitive starting points for likelihood maximisation, using the same calculations as before.

For the occupancy probability, we use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.7272727

For the detection probability, we focus on sites in which there was at least one detection and calculate the ratio of sessions with at least one detection.

(p_init <- mean(
  getY(umf)[rowSums(getY(umf), na.rm = TRUE) > 0,] > 0, 
  na.rm = T
))
[1] 0.008628731

We run the model.

beforetime = Sys.time()
DayOccuMod <- occu(formula =  ~ 1 ~ 1,
     data = umf,
     method = "Nelder-Mead",
     starts = c(qlogis(psi_init), qlogis(p_init))
)
aftertime = Sys.time()
print(DayOccuMod)

Call:
occu(formula = ~1 ~ 1, data = umf, starts = c(qlogis(psi_init), 
    qlogis(p_init)), method = "Nelder-Mead")

Occupancy:
 Estimate    SE    z P(>|z|)
     1.39 0.895 1.55   0.121

Detection:
 Estimate    SE     z  P(>|z|)
    -4.79 0.172 -27.9 3.2e-171

AIC: 440.5986 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(DayOccuMod, type = "state")
Backtransformed linear combination(s) of Occupancy estimate(s)

 Estimate    SE LinComb (Intercept)
      0.8 0.143    1.39           1

Transformation: logistic 
backTransform(DayOccuMod, type = "det")
Backtransformed linear combination(s) of Detection estimate(s)

 Estimate      SE LinComb (Intercept)
  0.00828 0.00141   -4.79           1

Transformation: logistic 
# 95% Confidence intervals
plogis(confint(DayOccuMod, type = 'state', method = 'normal'))
             0.025     0.975
psi(Int) 0.4097286 0.9586336
plogis(confint(DayOccuMod, type = 'det', method = 'normal'))
             0.025     0.975
p(Int) 0.005931632 0.0115549
# 50% Confidence intervals
plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.6868307 0.8800189
plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))
              0.25        0.75
p(Int) 0.007384375 0.009289357

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "BP",
  "Discretisation" = "Day",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(DayOccuMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(DayOccuMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(DayOccuMod, type = "state")@estimate,
  "psi_CI95lower" = plogis(confint(DayOccuMod, type = 'state', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(DayOccuMod, type = 'state', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(DayOccuMod, type = 'state', method = 'normal', level = 0.50))[2],
  # p
  "p_TransformedPointEstimate" = unname(coef(DayOccuMod)["p(Int)"]),
  "p_TransformedSE" = unname(SE(DayOccuMod)["p(Int)"]),
  "p_PointEstimate" = backTransform(DayOccuMod, type = "det")@estimate,
  "p_CI95lower" = plogis(confint(DayOccuMod, type = 'det', method = 'normal'))[1],
  "p_CI95upper" = plogis(confint(DayOccuMod, type = 'det', method = 'normal'))[2],
  "p_CI50lower" = plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))[1],
  "p_CI50upper" = plogis(confint(DayOccuMod, type = 'det', method = 'normal', level = 0.50))[2]
))

COP with unmarked::occuCOP

Discretised per month

We create an unmarkedFrame object. With COP, we have L the length of the observation periods, here the duration of the sessions, which is useful to better integrate months that have different durations for the estimation of a daily detection rate.

To make this comparable to the simulation study, we dropped the data of incomplete sessions, like we did for the BP model. However, because we can stipulate to the model that different sessions have different durations, we probably would have had a better estimation if we kept all data.

umf = unmarkedFrameOccuCOP(
  y = as.matrix(MonthCountMatrix),
  L = matrix(
    data = rep(days_in_month(MonthSessions[-length(MonthSessions)]), each =
                 nrow(MonthCountMatrix)),
    nrow = nrow(MonthCountMatrix),
    ncol = ncol(MonthCountMatrix),
    dimnames = dimnames(MonthCountMatrix)
  )
)
summary(umf)
unmarkedFrameOccuCOP Object

11 sites
Maximum number of sampling occasions per site: 29 
Mean number of sampling occasions per site: 15.36 
Sites with at least one detection: 9 

Tabulation of y observations:
   0    1    2    3    4    5    6    7    8   14 <NA> 
  90   28   31    3    8    1    4    1    2    1  150 

Tabulation of sampling occasions length:
 28  30  31 
 33  99 187 

We calculate intuitive starting points for likelihood maximisation.

For the occupancy probability, we once again use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818

For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.

(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
                     na.rm = T))
[1] 0.04573462

We run the model.

beforetime = Sys.time()
MonthOccuCOPMod <- occuCOP(
  data = umf,
  psiformula =  ~ 1,
  lambdaformula =  ~ 1,
  method = "Nelder-Mead",
  psistarts = qlogis(psi_init),
  lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(MonthOccuCOPMod)

Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init), 
    lambdastarts = log(lambda_init), method = "Nelder-Mead")

Occupancy probability:
 Estimate    SE    z P(>|z|)
      1.5 0.782 1.92  0.0543

Detection rate:
 Estimate     SE     z P(>|z|)
    -3.09 0.0713 -43.4       0

AIC: 86.80169 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(MonthOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)

 Estimate    SE LinComb (Intercept)
    0.818 0.116     1.5           1

Transformation: logistic 
backTransform(MonthOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)

 Estimate      SE LinComb (Intercept)
   0.0456 0.00325   -3.09           1

Transformation: exp 
# 95% Confidence intervals
plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))
           0.025     0.975
psi(Int) 0.49299 0.9542132
plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))
                 0.025      0.975
lambda(Int) 0.03811187 0.04978027
# 50% Confidence intervals
plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.7265227 0.8840954
plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
                 0.25       0.75
lambda(Int) 0.0416153 0.04562218

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "COP",
  "Discretisation" = "Month",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(MonthOccuCOPMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(MonthOccuCOPMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(MonthOccuCOPMod, type = "psi")@estimate,
  "psi_CI95lower" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(MonthOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
  # lambda
  "lambda_TransformedPointEstimate" = unname(coef(MonthOccuCOPMod)["lambda(Int)"]),
  "lambda_TransformedSE" = unname(SE(MonthOccuCOPMod)["lambda(Int)"]),
  "lambda_PointEstimate" = backTransform(MonthOccuCOPMod, type = "lambda")@estimate,
  "lambda_CI95lower" = exp(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))[1],
  "lambda_CI95upper" = exp(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal'))[2],
  "lambda_CI50lower" = plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
  "lambda_CI50upper" = plogis(confint(MonthOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))

Discretised per week

We create an unmarkedFrame object.

umf = unmarkedFrameOccuCOP(
  y = as.matrix(WeekCountMatrix),
  L = matrix(
    7,
    nrow = nrow(WeekCountMatrix),
    ncol = ncol(WeekCountMatrix),
    dimnames = dimnames(WeekCountMatrix)
  )
)
summary(umf)
unmarkedFrameOccuCOP Object

11 sites
Maximum number of sampling occasions per site: 125 
Mean number of sampling occasions per site: 69.09 
Sites with at least one detection: 9 

Tabulation of y observations:
   0    1    2    3    4    5    6    8   14 <NA> 
 649   62   35    3    7    1    1    1    1  615 

Tabulation of sampling occasions length:
   7 
1375 

We calculate intuitive starting points for likelihood maximisation.

For the occupancy probability, we once again use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818

For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.

(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
                     na.rm = T))
[1] 0.0457324

We run the model.

beforetime = Sys.time()
WeekOccuCOPMod <- occuCOP(
  data = umf,
  psiformula =  ~ 1,
  lambdaformula =  ~ 1,
  method = "Nelder-Mead",
  psistarts = qlogis(psi_init),
  lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(WeekOccuCOPMod)

Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init), 
    lambdastarts = log(lambda_init), method = "Nelder-Mead")

Occupancy probability:
 Estimate    SE    z P(>|z|)
      1.5 0.782 1.92  0.0544

Detection rate:
 Estimate     SE     z P(>|z|)
    -3.08 0.0704 -43.8       0

AIC: 88.1427 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(WeekOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)

 Estimate    SE LinComb (Intercept)
    0.818 0.116     1.5           1

Transformation: logistic 
backTransform(WeekOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)

 Estimate      SE LinComb (Intercept)
   0.0457 0.00322   -3.08           1

Transformation: exp 
# 95% Confidence intervals
plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))
            0.025     0.975
psi(Int) 0.492971 0.9541862
plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))
                 0.025      0.975
lambda(Int) 0.03831473 0.04987641
# 50% Confidence intervals
plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.7264723 0.8840503
plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
                  0.25       0.75
lambda(Int) 0.04179016 0.04576062

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "COP",
  "Discretisation" = "Week",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(WeekOccuCOPMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(WeekOccuCOPMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(WeekOccuCOPMod, type = "psi")@estimate,
  "psi_CI95lower" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(WeekOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
  # lambda
  "lambda_TransformedPointEstimate" = unname(coef(WeekOccuCOPMod)["lambda(Int)"]),
  "lambda_TransformedSE" = unname(SE(WeekOccuCOPMod)["lambda(Int)"]),
  "lambda_PointEstimate" = backTransform(WeekOccuCOPMod, type = "lambda")@estimate,
  "lambda_CI95lower" = exp(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))[1],
  "lambda_CI95upper" = exp(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal'))[2],
  "lambda_CI50lower" = plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
  "lambda_CI50upper" = plogis(confint(WeekOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))

Discretised per day

We create an unmarkedFrame object.

umf = unmarkedFrameOccuCOP(
  y = as.matrix(DayCountMatrix),
  L = matrix(
    1,
    nrow = nrow(DayCountMatrix),
    ncol = ncol(DayCountMatrix),
    dimnames = dimnames(DayCountMatrix)
  )
)
summary(umf)
unmarkedFrameOccuCOP Object

11 sites
Maximum number of sampling occasions per site: 881 
Mean number of sampling occasions per site: 489.64 
Sites with at least one detection: 9 

Tabulation of y observations:
   0    1    2    3    4    6    8 <NA> 
5239  110   28    5    2    1    1 4305 

Tabulation of sampling occasions length:
   1 
9691 

We calculate intuitive starting points for likelihood maximisation.

For the occupancy probability, we once again use the ratio of sites with at least one detection.

(psi_init <- mean(rowSums(getY(umf), na.rm = TRUE) > 0))
[1] 0.8181818

For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections divided by the number of days in the deployment.

(lambda_init <- mean((getY(umf) / getL(umf))[rowSums(getY(umf), na.rm = TRUE) > 0, ],
                     na.rm = T))
[1] 0.04540371

We run the model.

beforetime = Sys.time()
DayOccuCOPMod <- occuCOP(
  data = umf,
  psiformula =  ~ 1,
  lambdaformula =  ~ 1,
  method = "Nelder-Mead",
  psistarts = qlogis(psi_init),
  lambdastarts = log(lambda_init)
)
aftertime = Sys.time()
print(DayOccuCOPMod)

Call:
occuCOP(data = umf, psiformula = ~1, lambdaformula = ~1, psistarts = qlogis(psi_init), 
    lambdastarts = log(lambda_init), method = "Nelder-Mead")

Occupancy probability:
 Estimate    SE    z P(>|z|)
      1.5 0.782 1.92  0.0544

Detection rate:
 Estimate     SE     z P(>|z|)
    -3.09 0.0702 -44.1       0

AIC: 88.21179 

The model running time is:

And we look at the estimates.

# Estimates
backTransform(DayOccuCOPMod, type = "psi")
Backtransformed linear combination(s) of Occupancy probability estimate(s)

 Estimate    SE LinComb (Intercept)
    0.818 0.116     1.5           1

Transformation: logistic 
backTransform(DayOccuCOPMod, type = "lambda")
Backtransformed linear combination(s) of Detection rate estimate(s)

 Estimate      SE LinComb (Intercept)
   0.0454 0.00319   -3.09           1

Transformation: exp 
# 95% Confidence intervals
plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))
             0.025     0.975
psi(Int) 0.4929715 0.9541861
plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))
                 0.025      0.975
lambda(Int) 0.03806229 0.04951968
# 50% Confidence intervals
plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))
              0.25      0.75
psi(Int) 0.7264724 0.8840502
plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))
                  0.25      0.75
lambda(Int) 0.04150693 0.0454416

For the comparison, we store those results in a dataframe.

ModelComparisonDF <- bind_rows(ModelComparisonDF, data.frame(
  "Model" = "COP",
  "Discretisation" = "Day",
  # psi
  "psi_TransformedPointEstimate" = unname(coef(DayOccuCOPMod)["psi(Int)"]),
  "psi_TransformedSE" = unname(SE(DayOccuCOPMod)["psi(Int)"]),
  "psi_PointEstimate" = backTransform(DayOccuCOPMod, type = "psi")@estimate,
  "psi_CI95lower" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))[1],
  "psi_CI95upper" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal'))[2],
  "psi_CI50lower" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[1],
  "psi_CI50upper" = plogis(confint(DayOccuCOPMod, type = 'psi', method = 'normal', level = 0.50))[2],
  # lambda
  "lambda_TransformedPointEstimate" = unname(coef(DayOccuCOPMod)["lambda(Int)"]),
  "lambda_TransformedSE" = unname(SE(DayOccuCOPMod)["lambda(Int)"]),
  "lambda_PointEstimate" = backTransform(DayOccuCOPMod, type = "lambda")@estimate,
  "lambda_CI95lower" = exp(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))[1],
  "lambda_CI95upper" = exp(confint(DayOccuCOPMod, type = 'lambda', method = 'normal'))[2],
  "lambda_CI50lower" = plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[1],
  "lambda_CI50upper" = plogis(confint(DayOccuCOPMod, type = 'lambda', method = 'normal', level = 0.50))[2]
))

PP

We calculate intuitive starting points for likelihood maximisation.

# Number of detections per site
NbDetecPerSite <- sapply(detection_times, function(x){length(x[[1]])})

# Duration of the monitoring period per site
MonitoringDurationPerSite <- unlist(list_T_ij)

For the occupancy probability, we once again use the ratio of sites with at least one detection.

(psi_init <- mean(NbDetecPerSite > 0))
[1] 0.8181818

For the detection rate (per day), we focus on sites in which there was at least one detection and calculate the average number of detections.

(lambda_init <- mean((NbDetecPerSite / MonitoringDurationPerSite)[NbDetecPerSite > 0]))
[1] 0.04315439

We run the model.

beforetime = Sys.time()
fitted_PP <- optim(
  # Initial parameters
  par = c(
    'psi' = logit(psi_init),
    'lambda' = log(lambda_init)
  ),
  # Function to optimize
  fn = get_PP_neg_loglikelihood,
  # Optim parameters
  method = "Nelder-Mead",
  # Other parameters of get_likelihood
  detection_times = detection_times,
  NbSites = NbSites,
  list_T_ij = list_T_ij,
  list_R_i = list_R_i,
  hessian = T
)
aftertime = Sys.time()
print(fitted_PP)
$par
      psi    lambda 
 1.505713 -3.093956 

$value
[1] 836.3008

$counts
function gradient 
      35       NA 

$convergence
[1] 0

$message
NULL

$hessian
                psi       lambda
psi    1.6346218104 4.588117e-04
lambda 0.0004588117 2.030074e+02

The model running time is:

We calculate the 95% confidence interval.

fisher_info <- solve(fitted_PP$hessian)
if (any(diag(fisher_info) < 0)) {
  se <- sqrt(diag(fisher_info) + 0i)
} else{
  se <- sqrt(diag(fisher_info))
}

# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_PP <- fitted_PP$par + qnorm(.975) * se
lower95CI_fitted_PP <- fitted_PP$par - qnorm(.975) * se

# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_PP <- fitted_PP$par + qnorm(.75) * se
lower50CI_fitted_PP <- fitted_PP$par - qnorm(.75) * se

And we look at our estimates and add them to our result dataframe.

resDF <- data.frame(
  "Model" = "PP",
  "Discretisation" = "No discretisation",
  # psi
  "psi_TransformedPointEstimate" = unname(fitted_PP$par['psi']),
  "psi_TransformedSE" = unname(se["psi"]),
  "psi_PointEstimate" = plogis(unname(fitted_PP$par['psi'])),
  "psi_CI95lower" = plogis(unname(lower95CI_fitted_PP['psi'])),
  "psi_CI95upper" = plogis(unname(upper95CI_fitted_PP['psi'])),
  "psi_CI50lower" = plogis(unname(lower50CI_fitted_PP['psi'])),
  "psi_CI50upper" = plogis(unname(upper50CI_fitted_PP['psi'])),
  # lambda
  "lambda_TransformedPointEstimate" = unname(fitted_PP$par['lambda']),
  "lambda_TransformedSE" = unname(se["lambda"]),
  "lambda_PointEstimate" = exp(unname(fitted_PP$par['lambda'])),
  "lambda_CI95lower" = exp(unname(lower95CI_fitted_PP['lambda'])),
  "lambda_CI95upper" = exp(unname(upper95CI_fitted_PP['lambda'])),
  "lambda_CI50lower" = plogis(unname(lower50CI_fitted_PP['lambda'])),
  "lambda_CI50upper" = plogis(unname(upper50CI_fitted_PP['lambda']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: PP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50571256852469
psi_TransformedSE: 0.782152351788616
psi_PointEstimate: 0.818424940325439
psi_CI95lower: 0.493180954971223
psi_CI95upper: 0.954292289691418
psi_CI50lower: 0.726742734086482
psi_CI50upper: 0.88424582170101
lambda_TransformedPointEstimate: -3.09395583060835
lambda_TransformedSE: 0.0701849568579153
lambda_PointEstimate: 0.0453223119325056
lambda_CI95lower: 0.0394975822303617
lambda_CI95upper: 0.0520060176576667
lambda_CI50lower: 0.0414356583550963
lambda_CI50upper: 0.0453637609703149
ModelComparisonDF <- bind_rows(
  ModelComparisonDF,
  resDF
)

IPP

We calculate intuitive starting points for likelihood maximisation.

# Number of detections per site
NbDetecPerSite <- sapply(detection_times, function(x){length(x[[1]])})

# Duration of the monitoring period per site
MonitoringDurationPerSite <- unlist(list_T_ij)

For the occupancy probability, we once again use the ratio of sites with at least one detection.

(psi_init <- mean(NbDetecPerSite > 0))
[1] 0.8181818

For the switching rates (\(\mu_{12}\), \(\mu_{21}\)), starting points are harder to estimate, although it could be done through a visual analysis for example. We will choose starting points at 1 (i.e. 1 switch per day on average).

mu_12_init = 1
mu_21_init = 1

For the daily detection rate \(\lambda_2\), we focus on sites in which there was at least one detection and calculate the average number of detections in state 2. To get this information, we need to divide the average number of detections per day by the time spent in state 2. With the IPP, we only have detections when we are in state 2. Because we chose \(\mu_{12}=\mu_{21}\) for our initial parameters, the system is in state 2 for half the time of the deployment, so even if this is probably not the best starting point, we will keep this logic.

(lambda2_init <- mean((NbDetecPerSite / MonitoringDurationPerSite)[NbDetecPerSite > 0])/2)
[1] 0.0215772

We run the model.

beforetime = Sys.time()
fitted_IPP <- optim(
  # Initial parameters
  par = c(
    'psi' = logit(psi_init),
    'lambda_2' = log(lambda2_init),
    'mu_12' = log(mu_12_init),
    'mu_21' = log(mu_21_init)
  ),
  # Function to optimize
  fn = get_IPP_neg_loglikelihood,
  # Optim parameters
  method = "Nelder-Mead",
  # Other parameters of get_likelihood
  detection_times = detection_times,
  NbSites = NbSites,
  list_T_ij = list_T_ij,
  list_R_i = list_R_i,
  hessian = T
)
aftertime = Sys.time()
print(fitted_IPP)
$par
      psi  lambda_2     mu_12     mu_21 
 1.502027  2.654741 -1.819536  3.926011 

$value
[1] 656.1936

$counts
function gradient 
     237       NA 

$convergence
[1] 0

$message
NULL

$hessian
                   psi      lambda_2         mu_12         mu_21
psi       1.6384814217  1.118394e-04  1.427338e-04 -1.115410e-04
lambda_2  0.0001118394  1.333160e+02  1.232471e+02 -1.260003e+02
mu_12     0.0001427338  1.232471e+02  1.572448e+02 -1.257209e+02
mu_21    -0.0001115410 -1.260003e+02 -1.257209e+02  1.473917e+02

The model running time is:

We calculate the 95% confidence interval.

fisher_info <- solve(fitted_IPP$hessian)
if (any(diag(fisher_info) < 0)) {
  se <- sqrt(diag(fisher_info) + 0i)
} else{
  se <- sqrt(diag(fisher_info))
}

# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_IPP <- fitted_IPP$par + qnorm(.975) * se
lower95CI_fitted_IPP <- fitted_IPP$par - qnorm(.975) * se

# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_IPP <- fitted_IPP$par + qnorm(.75) * se
lower50CI_fitted_IPP <- fitted_IPP$par - qnorm(.75) * se

And we look at our estimates and add them to our result dataframe.

resDF <- data.frame(
  "Model" = "IPP",
  "Discretisation" = "No discretisation",
  # psi
  "psi_TransformedPointEstimate" = unname(fitted_IPP$par['psi']),
  "psi_TransformedSE" = unname(se["psi"]),
  "psi_PointEstimate" = plogis(unname(fitted_IPP$par['psi'])),
  "psi_CI95lower" = plogis(unname(lower95CI_fitted_IPP['psi'])),
  "psi_CI95upper" = plogis(unname(upper95CI_fitted_IPP['psi'])),
  "psi_CI50lower" = plogis(unname(lower50CI_fitted_IPP['psi'])),
  "psi_CI50upper" = plogis(unname(upper50CI_fitted_IPP['psi'])),
  # lambda2
  "lambda2_TransformedPointEstimate" = unname(fitted_IPP$par['lambda_2']),
  "lambda2_TransformedSE" = unname(se["lambda_2"]),
  "lambda2_PointEstimate" = exp(unname(fitted_IPP$par['lambda_2'])),
  "lambda2_CI95lower" = exp(unname(lower95CI_fitted_IPP['lambda_2'])),
  "lambda2_CI95upper" = exp(unname(upper95CI_fitted_IPP['lambda_2'])),
  "lambda2_CI50lower" = exp(unname(lower50CI_fitted_IPP['lambda_2'])),
  "lambda2_CI50upper" = exp(unname(upper50CI_fitted_IPP['lambda_2'])),
  # mu12
  "mu12_TransformedPointEstimate" = unname(fitted_IPP$par['mu_12']),
  "mu12_TransformedSE" = unname(se["mu_12"]),
  "mu12_PointEstimate" = exp(unname(fitted_IPP$par['mu_12'])),
  "mu12_CI95lower" = exp(unname(lower95CI_fitted_IPP['mu_12'])),
  "mu12_CI95upper" = exp(unname(upper95CI_fitted_IPP['mu_12'])),
  "mu12_CI50lower" = exp(unname(lower50CI_fitted_IPP['mu_12'])),
  "mu12_CI50upper" = exp(unname(upper50CI_fitted_IPP['mu_12'])),
  # mu21
  "mu21_TransformedPointEstimate" = unname(fitted_IPP$par['mu_21']),
  "mu21_TransformedSE" = unname(se["mu_21"]),
  "mu21_PointEstimate" = exp(unname(fitted_IPP$par['mu_21'])),
  "mu21_CI95lower" = exp(unname(lower95CI_fitted_IPP['mu_21'])),
  "mu21_CI95upper" = exp(unname(upper95CI_fitted_IPP['mu_21'])),
  "mu21_CI50lower" = exp(unname(lower50CI_fitted_IPP['mu_21'])),
  "mu21_CI50upper" = exp(unname(upper50CI_fitted_IPP['mu_21']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: IPP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50202713555221
psi_TransformedSE: 0.781230588320164
psi_PointEstimate: 0.817876621654525
psi_CI95lower: 0.492711346046314
psi_CI95upper: 0.954052135773875
psi_CI50lower: 0.726133895808135
psi_CI50upper: 0.883804232267414
lambda2_TransformedPointEstimate: 2.65474069705417
lambda2_TransformedSE: 0.220176949186189
lambda2_PointEstimate: 14.2212979567295
lambda2_CI95lower: 9.23685820217277
lambda2_CI95upper: 21.8954660932768
lambda2_CI50lower: 12.2586719595043
lambda2_CI50upper: 16.4981423960265
mu12_TransformedPointEstimate: -1.81953578851366
mu12_TransformedSE: 0.157540140021699
mu12_PointEstimate: 0.1621009826089
mu12_CI95lower: 0.119038442826728
mu12_CI95upper: 0.22074153474117
mu12_CI50lower: 0.145759831977842
mu12_CI50upper: 0.180274141416172
mu21_TransformedPointEstimate: 3.92601134673108
mu21_TransformedSE: 0.194862933899288
mu21_PointEstimate: 50.7043317970344
mu21_CI95lower: 34.6080742396373
mu21_CI95upper: 74.2869783849231
mu21_CI50lower: 44.4594836706484
mu21_CI50upper: 57.8263409901236
ModelComparisonDF <- bind_rows(
  ModelComparisonDF,
  resDF
)

2-MMPP

We will use the same starting points as we did with IPP. We just have to add an initial parameter for the detection rate in state 1 \(\lambda_1\). We’ll chose one close to 0.

lambda1_init = 0.01

We run the model.

beforetime = Sys.time()
fitted_2MMPP <- optim(
  # Initial parameters
  par = c(
    'psi' = logit(psi_init),
    'lambda_1' = log(lambda1_init),
    'lambda_2' = log(lambda2_init),
    'mu_12' = log(mu_12_init),
    'mu_21' = log(mu_21_init)
  ),
  # Function to optimize
  fn = get_2MMPP_neg_loglikelihood,
  # Optim parameters
  method = "Nelder-Mead",
  # Other parameters of get_likelihood
  detection_times = detection_times,
  NbSites = NbSites,
  list_T_ij = list_T_ij,
  list_R_i = list_R_i,
  hessian = T
)
aftertime = Sys.time()
print(fitted_2MMPP)
$par
      psi  lambda_1  lambda_2     mu_12     mu_21 
 1.504410 -4.782945  2.870407 -2.290886  3.875047 

$value
[1] 655.8123

$counts
function gradient 
     348       NA 

$convergence
[1] 0

$message
NULL

$hessian
                   psi      lambda_1      lambda_2         mu_12         mu_21
psi       1.635999e+00  3.372236e-05  8.009238e-05  1.091252e-04 -7.990764e-05
lambda_1  3.372236e-05  1.083773e+01  1.205987e+01  2.614429e+01 -1.361400e+01
lambda_2  8.009238e-05  1.205987e+01  9.463981e+01  7.631121e+01 -8.494856e+01
mu_12     1.091252e-04  2.614429e+01  7.631121e+01  9.443247e+01 -7.730811e+01
mu_21    -7.990764e-05 -1.361400e+01 -8.494856e+01 -7.730811e+01  1.050761e+02

The model running time is:

We calculate the 95% confidence interval.

fisher_info <- solve(fitted_2MMPP$hessian)
if (any(diag(fisher_info) < 0)) {
  se <- sqrt(diag(fisher_info) + 0i)
} else{
  se <- sqrt(diag(fisher_info))
}

# 95% CI transformed (logit for psi, log for lambda)
upper95CI_fitted_2MMPP <- fitted_2MMPP$par + qnorm(.975) * se
lower95CI_fitted_2MMPP <- fitted_2MMPP$par - qnorm(.975) * se

# 50% CI transformed (logit for psi, log for lambda)
upper50CI_fitted_2MMPP <- fitted_2MMPP$par + qnorm(.75) * se
lower50CI_fitted_2MMPP <- fitted_2MMPP$par - qnorm(.75) * se

And we look at our estimates and add them to our result dataframe.

resDF <- data.frame(
  "Model" = "2-MMPP",
  "Discretisation" = "No discretisation",
  # psi
  "psi_TransformedPointEstimate" = unname(fitted_2MMPP$par['psi']),
  "psi_TransformedSE" = unname(se["psi"]),
  "psi_PointEstimate" = plogis(unname(fitted_2MMPP$par['psi'])),
  "psi_CI95lower" = plogis(unname(lower95CI_fitted_2MMPP['psi'])),
  "psi_CI95upper" = plogis(unname(upper95CI_fitted_2MMPP['psi'])),
  "psi_CI50lower" = plogis(unname(lower50CI_fitted_2MMPP['psi'])),
  "psi_CI50upper" = plogis(unname(upper50CI_fitted_2MMPP['psi'])),
  # lambda1
  "lambda1_TransformedPointEstimate" = unname(fitted_2MMPP$par['lambda_1']),
  "lambda1_TransformedSE" = unname(se["lambda_1"]),
  "lambda1_PointEstimate" = exp(unname(fitted_2MMPP$par['lambda_1'])),
  "lambda1_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['lambda_1'])),
  "lambda1_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['lambda_1'])),
  "lambda1_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['lambda_1'])),
  "lambda1_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['lambda_1'])),
  # lambda2
  "lambda2_TransformedPointEstimate" = unname(fitted_2MMPP$par['lambda_2']),
  "lambda2_TransformedSE" = unname(se["lambda_2"]),
  "lambda2_PointEstimate" = exp(unname(fitted_2MMPP$par['lambda_2'])),
  "lambda2_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['lambda_2'])),
  "lambda2_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['lambda_2'])),
  "lambda2_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['lambda_2'])),
  "lambda2_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['lambda_2'])),
  # mu12
  "mu12_TransformedPointEstimate" = unname(fitted_2MMPP$par['mu_12']),
  "mu12_TransformedSE" = unname(se["mu_12"]),
  "mu12_PointEstimate" = exp(unname(fitted_2MMPP$par['mu_12'])),
  "mu12_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['mu_12'])),
  "mu12_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['mu_12'])),
  "mu12_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['mu_12'])),
  "mu12_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['mu_12'])),
  # mu21
  "mu21_TransformedPointEstimate" = unname(fitted_2MMPP$par['mu_21']),
  "mu21_TransformedSE" = unname(se["mu_21"]),
  "mu21_PointEstimate" = exp(unname(fitted_2MMPP$par['mu_21'])),
  "mu21_CI95lower" = exp(unname(lower95CI_fitted_2MMPP['mu_21'])),
  "mu21_CI95upper" = exp(unname(upper95CI_fitted_2MMPP['mu_21'])),
  "mu21_CI50lower" = exp(unname(lower50CI_fitted_2MMPP['mu_21'])),
  "mu21_CI50upper" = exp(unname(upper50CI_fitted_2MMPP['mu_21']))
)
cat(paste(colnames(resDF), resDF[1, ], sep = ": ", collapse = "\n"))
Model: 2-MMPP
Discretisation: No discretisation
psi_TransformedPointEstimate: 1.50441036232307
psi_TransformedSE: 0.781822988445301
psi_PointEstimate: 0.818231344999125
psi_CI95lower: 0.493016819768922
psi_CI95upper: 0.954207256666532
psi_CI50lower: 0.726528196092816
psi_CI50upper: 0.8840897043584
lambda1_TransformedPointEstimate: -4.78294476450616
lambda1_TransformedSE: 0.995922385227961
lambda1_PointEstimate: 0.00837131109556312
lambda1_CI95lower: 0.00118867412825875
lambda1_CI95upper: 0.0589554763519203
lambda1_CI50lower: 0.00427622697818252
lambda1_CI50upper: 0.016388009760998
lambda2_TransformedPointEstimate: 2.87040722958402
lambda2_TransformedSE: 0.31936893270487
lambda2_PointEstimate: 17.6442019780465
lambda2_CI95lower: 9.43527781654207
lambda2_CI95upper: 32.9950924069552
lambda2_CI50lower: 14.2249314062358
lambda2_CI50upper: 21.8853683403794
mu12_TransformedPointEstimate: -2.2908855713745
mu12_TransformedSE: 0.544556144954539
mu12_PointEstimate: 0.101176822870786
mu12_CI95lower: 0.0347979778013971
mu12_CI95upper: 0.294176562346545
mu12_CI50lower: 0.0700754255232265
mu12_CI50upper: 0.146081874063446
mu21_TransformedPointEstimate: 3.87504686033604
mu21_TransformedSE: 0.203585516009739
mu21_PointEstimate: 48.1849562014347
mu21_CI95lower: 32.3310019043005
mu21_CI95upper: 71.8131164325391
mu21_CI50lower: 42.0025578029422
mu21_CI50upper: 55.2773479897822
ModelComparisonDF <- bind_rows(
  ModelComparisonDF,
  resDF
)

Look at the results

PublicationDF <- ModelComparisonDF %>% 
  pivot_longer(cols = starts_with(c("psi", "p", "lambda", "mu")), 
               names_to = c("Parameter", "Interval"), 
               names_sep = "_")%>% 
  pivot_wider(names_from = "Interval",values_from = "value") %>% 
  filter(rowSums(is.na(.)) < 2)


PublicationDF %>%
  mutate(TransformedPointEstimate = round(TransformedPointEstimate, 3),
         TransformedSE = round(TransformedSE, 3),
         PointEstimate = round(PointEstimate, 3),
         CI95lower = round(CI95lower, 3),
         CI95upper = round(CI95upper, 3),
         CI50lower = round(CI50lower, 3),
         CI50upper = round(CI50upper, 3),
         Model = factor(Model, levels = c("BP", "COP", "PP", "IPP", "2-MMPP")),
         Discretisation = factor(Discretisation, levels = c("No discretisation","Day","Week","Month")),
         Parameter = as.factor(Parameter)) %>%
  myDT(caption = "Occupancy models comparison on the lynx dataset")

Occupancy probability estimates

plotPsiDiscrete = ModelComparisonDF %>%
  filter(Model %in% c("BP", "COP")) %>%
  mutate(
    Model = factor(
      as.character(Model),
      levels = c("BP", "COP"),
      labels = c("BP", "COP"),
      ordered = T
    ),
    Discretisation = factor(as.character(Discretisation), levels = c("Month", "Week", "Day"), ordered = T)
  ) %>%
  ggplot(aes(group = interaction(Model, Discretisation))) +
  geom_segment(
    aes(
      y = psi_CI95lower,
      yend = psi_CI95upper,
      x = Discretisation,
      xend = Discretisation,
      colour = "95% confidence interval"
    ),
    linewidth = 2
  ) +
  geom_segment(
    aes(
      y = psi_CI50lower,
      yend = psi_CI50upper,
      x = Discretisation,
      xend = Discretisation,
      colour = "50% confidence interval"
    ),
    linewidth = 2
  ) +
  geom_point(
    aes(x = Discretisation, y = psi_PointEstimate, fill = "Point estimate"),
    size = 3,
    shape = 23,
    colour = "transparent"
  ) +
  facet_grid(. ~ Model,
             switch = "y") +
  scale_fill_manual(values = c("Point estimate" = "grey5"),
                    name = "") +
  scale_colour_manual(
    values = c(
      "95% confidence interval" = "grey80",
      "50% confidence interval" = "grey65"
    ),
    name = ""
  ) +
  ylim(0, 1) +
  theme_minimal(base_size = 15) +
  labs(y = "Estimated occupancy probability") +
  scale_x_discrete(position = "top") +
  theme(
    strip.background.y = element_rect(fill = "gray93", colour = "white"),
    strip.text.y = element_text(colour = "black", face = "bold"),
    strip.background.x = element_rect(fill = "gray80", colour = "white"),
    strip.text.x = element_text(colour = "black", face = "bold"),
    axis.title.x = element_blank(),
    axis.text.x = element_text(hjust=.5),
    axis.text.y = element_text(hjust=.5),
    legend.position = "bottom",
    legend.title = element_text(hjust = 0.5),
    strip.placement = "outside",
    plot.title = element_blank(),
    plot.margin = unit(c(0, 0, 0, 0), "cm"),
    panel.border = element_rect(colour = "gray80", fill = "transparent"),
    panel.grid.major.x = element_blank()
  )

plotPsiContinuous = ModelComparisonDF %>%
  filter(Model %in% c("PP", "IPP", "2-MMPP")) %>%
  mutate(Model = factor(
    as.character(Model),
    levels = c("PP", "IPP", "2-MMPP"),
    labels = c("PP", "IPP", "2-MMPP"),
    ordered = T
  )) %>%
  ggplot(aes(group = interaction(Model, Discretisation))) +
  geom_segment(
    aes(
      y = psi_CI95lower,
      yend = psi_CI95upper,
      x = Discretisation,
      xend = Discretisation,
      colour = "95% confidence interval"
    ),
    linewidth = 2
  ) +
  geom_segment(
    aes(
      y = psi_CI50lower,
      yend = psi_CI50upper,
      x = Discretisation,
      xend = Discretisation,
      colour = "50% confidence interval"
    ),
    linewidth = 2
  ) +
  geom_point(
    aes(x = Discretisation, y = psi_PointEstimate, fill = "Point estimate"),
    size = 3,
    shape = 23,
    colour = "transparent"
  ) +
  facet_grid(. ~ Model,
             switch = "y") +
  scale_fill_manual(values = c("Point estimate" = "grey5"),
                    name = "") +
  scale_colour_manual(
    values = c(
      "95% confidence interval" = "grey80",
      "50% confidence interval" = "grey65"
    ),
    name = ""
  ) +
  ylim(0, 1) +
  theme_minimal(base_size = 15) +
  labs(y = "Estimated occupancy probability") +
  scale_x_discrete(position = "top") +
  theme(
    strip.background.y = element_rect(fill = "gray93", colour = "white"),
    strip.background.x = element_rect(fill = "gray80", colour = "white"),
    strip.text.x = element_text(colour = "black", face = "bold"),
    axis.title.x = element_blank(),
    axis.text.x = element_text(hjust=.5),
    legend.position = "bottom",
    legend.title = element_text(hjust = 0.5),
    strip.placement = "outside",
    plot.title = element_blank(),
    plot.margin = unit(c(0, 0, 0, 0), "cm"),
    panel.border = element_rect(colour = "gray80", fill = "transparent"),
    panel.grid.major.x = element_blank(),
    # Remove y axis informations
    strip.text.y = element_blank(),
    axis.text.y = element_blank(),
    axis.title.y = element_blank()
  )

plotPsi = ggpubr::ggarrange(
  plotPsiDiscrete,
  plotPsiContinuous,
  common.legend = TRUE,
  legend = "bottom",
  ncol = 2,
  widths = c(1, 0.7)
) +
  theme(plot.background = element_rect(fill = "white"))
print(plotPsi)

# Export
ggsave(
  "./output/lynx_psi.pdf",
  plot = plotPsi,
  width = 25,
  height = 10,
  unit = "cm"
)
jpeg(
  filename = "./output/lynx_psi.jpeg",
  width = 25,
  height = 10,
  unit = "cm",
  res = 100
)
print(plotPsi)
dev.off()
png 
  2 

Detection parameters estimates

ModelComparisonDFlong = ModelComparisonDF %>%
  pivot_longer(cols = starts_with(c("psi", "p", "lambda", "mu")), 
               names_to = c("Parameter", "Interval"), 
               names_sep = "_") %>% 
  pivot_wider(names_from = "Interval",values_from = "value")


detecParams = unique(ModelComparisonDFlong$Parameter)
detecParams = detecParams[detecParams != "psi"]
for (param in detecParams) {
  ggDetec = ModelComparisonDFlong %>%
    filter(Parameter == param) %>%
    filter(!is.na(PointEstimate)) %>%
    ggplot(aes(group = interaction(Model, Discretisation))) +
    geom_segment(
      aes(
        y = CI95lower,
        yend = CI95upper,
        x = Discretisation,
        xend = Discretisation,
        colour = "95% confidence interval"
      ),
      linewidth = 2
    ) +
    geom_segment(
      aes(
        y = CI50lower,
        yend = CI50upper,
        x = Discretisation,
        xend = Discretisation,
        colour = "50% confidence interval"
      ),
      linewidth = 2
    ) +
    geom_point(
      aes(x = Discretisation, y = PointEstimate, fill = "Point estimate"),
      size = 3,
      shape = 23,
      colour = "transparent"
    ) +
    facet_grid(. ~ Model,
               switch = "y",
               scales = "free") +
    scale_fill_manual(values = c("Point estimate" = "grey5"),
                      name = "") +
    scale_colour_manual(
      values = c(
        "95% confidence interval" = "grey80",
        "50% confidence interval" = "grey65"
      ),
      name = ""
    ) +
    theme_minimal(base_size = 15) +
    labs(y = paste("Estimated", param),
         title =  paste("Detection parameter:", param)) +
    scale_x_discrete(position = "top") +
    theme(
      strip.background.y = element_rect(fill = "gray93", colour = "white"),
      strip.text.y = element_text(colour = "black", face = "bold"),
      strip.background.x = element_rect(fill = "gray80", colour = "white"),
      strip.text.x = element_text(colour = "black", face = "bold"),
      axis.title.x = element_blank(),
      axis.text.x = element_text(hjust = .5),
      axis.text.y = element_text(hjust = .5),
      legend.position = "bottom",
      legend.title = element_text(hjust = 0.5),
      strip.placement = "outside",
      plot.title = element_text(hjust = 0.5, face = "bold"),
      plot.margin = unit(c(0, 0, 0, 0), "cm"),
      panel.border = element_rect(colour = "gray80", fill = "transparent"),
      panel.grid.major.x = element_blank()
    )
  print(ggDetec)
}